easypackages::packages ("sf", "sp", "spdep", "spatialreg", "GWmodel", "tmap", "mapview", "car", "RColorBrewer",
"cowplot", "leafsync", "leaflet.extras2", "mapview", "tidyverse", "patchwork")
## Loading required package: sf
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
## Loading required package: sp
## Loading required package: spdep
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: spatialreg
## Loading required package: Matrix
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
## Loading required package: GWmodel
## Loading required package: robustbase
## Loading required package: Rcpp
## Welcome to GWmodel version 2.3-2.
## Loading required package: tmap
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
## Loading required package: mapview
## Loading required package: car
## Loading required package: carData
## Loading required package: RColorBrewer
## Loading required package: cowplot
## Loading required package: leafsync
## Loading required package: leaflet.extras2
## Loading required package: leaflet
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some() masks car::some()
## ✖ lubridate::stamp() masks cowplot::stamp()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: patchwork
##
##
## Attaching package: 'patchwork'
##
##
## The following object is masked from 'package:cowplot':
##
## align_plots
##
##
## All packages loaded successfully
project_directory <- dirname(getwd())
toronto_geojson_file <- file.path(project_directory, "data", "geodata", "neighbourhoods.geojson")
toronto <- st_read(toronto_geojson_file)
## Reading layer `neighbourhoods' from data source
## `/Users/michelleuni/Documents/UU/Spatial_Analysis/Term Project/data/geodata/neighbourhoods.geojson'
## using driver `GeoJSON'
## Simple feature collection with 140 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -79.63926 ymin: 43.581 xmax: -79.11527 ymax: 43.85546
## Geodetic CRS: WGS 84
# the AREA_NAME is the name of the neighbourhood
# we need to clean the text so it is the same as all the other files
toronto$AREA_NAME <- tolower(trimws(gsub("\\(\\d+\\)", "", toronto$AREA_NAME)))
toronto$AREA_NAME <- gsub("\\(\\d+\\)", "", toronto$AREA_NAME)
toronto$AREA_NAME <- gsub("[[:punct:]]", "", toronto$AREA_NAME)
names(toronto)[names(toronto) == "AREA_NAME"] <- "neighbourhood"
toronto$neighbourhood[which(toronto$neighbourhood == 'cabbagetownsouth stjames town')] <- 'cabbagetownsouth st james town'
toronto$neighbourhood[which(toronto$neighbourhood == 'north stjames town')] <- 'north st james town'
toronto <- toronto[order(toronto$neighbourhood), ]
toronto$neighbourhood
## [1] "agincourt north" "agincourt southmalvern west"
## [3] "alderwood" "annex"
## [5] "banburydon mills" "bathurst manor"
## [7] "bay street corridor" "bayview village"
## [9] "bayview woodssteeles" "bedford parknortown"
## [11] "beechboroughgreenbrook" "bendale"
## [13] "birchcliffecliffside" "black creek"
## [15] "blakejones" "briar hillbelgravia"
## [17] "bridle pathsunnybrookyork mills" "broadview north"
## [19] "brookhavenamesbury" "cabbagetownsouth st james town"
## [21] "caledoniafairbank" "casa loma"
## [23] "centennial scarborough" "churchyonge corridor"
## [25] "clairleabirchmount" "clanton park"
## [27] "cliffcrest" "corso italiadavenport"
## [29] "danforth" "danforth east york"
## [31] "don valley village" "dorset park"
## [33] "dovercourtwallace emersonjunction" "downsviewrodingcfb"
## [35] "dufferin grove" "east enddanforth"
## [37] "edenbridgehumber valley" "eglinton east"
## [39] "elmsold rexdale" "englemountlawrence"
## [41] "eringatecentennialwest deane" "etobicoke west mall"
## [43] "flemingdon park" "forest hill north"
## [45] "forest hill south" "glenfieldjane heights"
## [47] "greenwoodcoxwell" "guildwood"
## [49] "henry farm" "high park north"
## [51] "high parkswansea" "highland creek"
## [53] "hillcrest village" "humber heightswestmount"
## [55] "humber summit" "humbermede"
## [57] "humewoodcedarvale" "ionview"
## [59] "islingtoncity centre west" "junction area"
## [61] "keelesdaleeglinton west" "kennedy park"
## [63] "kensingtonchinatown" "kingsview villagethe westway"
## [65] "kingsway south" "lambton baby point"
## [67] "lamoreaux" "lansingwestgate"
## [69] "lawrence park north" "lawrence park south"
## [71] "leasidebennington" "little portugal"
## [73] "long branch" "malvern"
## [75] "maple leaf" "markland wood"
## [77] "milliken" "mimico includes humber bay shores"
## [79] "morningside" "moss park"
## [81] "mount dennis" "mount olivesilverstonejamestown"
## [83] "mount pleasant east" "mount pleasant west"
## [85] "new toronto" "newtonbrook east"
## [87] "newtonbrook west" "niagara"
## [89] "north riverdale" "north st james town"
## [91] "oakridge" "oakwood village"
## [93] "oconnorparkview" "old east york"
## [95] "palmerstonlittle italy" "parkwoodsdonalda"
## [97] "pelmo parkhumberlea" "playter estatesdanforth"
## [99] "pleasant view" "princessrosethorn"
## [101] "regent park" "rexdalekipling"
## [103] "rockcliffesmythe" "roncesvalles"
## [105] "rosedalemoore park" "rouge"
## [107] "runnymedebloor west village" "rustic"
## [109] "scarborough village" "south parkdale"
## [111] "south riverdale" "standrewwindfields"
## [113] "steeles" "stonegatequeensway"
## [115] "tam oshantersullivan" "taylormassey"
## [117] "the beaches" "thistletownbeaumond heights"
## [119] "thorncliffe park" "trinitybellwoods"
## [121] "university" "victoria village"
## [123] "waterfront communitiesthe island" "west hill"
## [125] "west humberclairville" "westminsterbranson"
## [127] "weston" "westonpelham park"
## [129] "wexfordmaryvale" "willowdale east"
## [131] "willowdale west" "willowridgemartingroverichview"
## [133] "woburn" "woodbine corridor"
## [135] "woodbinelumsden" "wychwood"
## [137] "yongeeglinton" "yongestclair"
## [139] "york university heights" "yorkdaleglen park"
toronto_map <- ggplot() +
geom_sf(data = toronto, fill = "lightblue") +
theme_void()
toronto_map
The census files contain 63 variables that will be used as indicators.
#census2011_path <- file.path(project_directory, "data", "census2011.csv")
#census2011 <- read.csv(census2011_path, row.names = 1)
census2016_path <- file.path(project_directory, "data", "census2016.csv")
census2016 <- read.csv(census2016_path, row.names = 1)
census2016 <- census2016[order(census2016$neighbourhood), ]
#census2021_path <- file.path(project_directory, "data", "census2021.csv")
#census2021 <- read.csv(census2021_path, row.names = 1)
names(census2016)
## [1] "neighbourhood" "single_detached_house"
## [3] "apart_5_plus" "other_dwelling"
## [5] "avg_household_size" "married"
## [7] "not_married" "single_parents"
## [9] "one_person_household" "two_plus_person_household"
## [11] "low_income_percent" "non_immigrants"
## [13] "immigrants" "owner"
## [15] "renter" "own_room"
## [17] "sharing_room" "suitable_housing"
## [19] "not_suitable_housing" "total_work_activity"
## [21] "unemployed" "employed"
## [23] "commute_drives" "commute_passenger"
## [25] "commute_public_transport" "commute_walk"
## [27] "commute_cycle" "commute_other"
## [29] "pop_0_to_4" "pop_5_to_9"
## [31] "pop_10_to_14" "pop_15_to_19"
## [33] "pop_20_to_24" "pop_25_to_29"
## [35] "pop_30_to_34" "pop_35_to_39"
## [37] "pop_40_to_44" "pop_45_to_49"
## [39] "pop_50_to_54" "pop_55_to_59"
## [41] "pop_60_to_64" "pop_65_to_69"
## [43] "pop_70_to_74" "pop_75_to_79"
## [45] "pop_80_to_84" "pop_85_to_89"
## [47] "pop_90_to_94" "pop_95_to_99"
## [49] "pop_100_plus" "household_income"
## [51] "income_under_5000" "income_between_5000_9999"
## [53] "income_between_10000_14999" "income_between_50000_59999"
## [55] "income_above_100k" "income_between_15000_19999"
## [57] "income_between_20000_29999" "income_between_30000_39999"
## [59] "income_between_40000_49999" "income_between_60000_79999"
## [61] "income_between_80000_99999" "no_certificate_diploma_degree"
## [63] "highschool_diploma" "post_secondary_diploma"
## [65] "total_population" "num_crimes"
toronto$area <- st_area(toronto)
#census2011$population_density <- census2011$total_population/toronto$area
census2016$population_density <- census2016$total_population/toronto$area
#census2021$population_density <- census2021$total_population/toronto$area
census2016_with_geom <- merge(census2016, toronto, by.x = "neighbourhood", by.y = "neighbourhood", all.x = TRUE)
census2016_sf <- st_as_sf(census2016_with_geom)
st_crs(census2016_sf) <- st_crs(toronto)
parks_geojson_file <- file.path(project_directory, "data", "geodata", "parks.geojson")
parks <- st_read(parks_geojson_file)
## Reading layer `parks' from data source
## `/Users/michelleuni/Documents/UU/Spatial_Analysis/Term Project/data/geodata/parks.geojson'
## using driver `GeoJSON'
## Simple feature collection with 1702 features and 82 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -79.62752 ymin: 43.581 xmax: -79.1242 ymax: 43.84753
## Geodetic CRS: WGS 84
names(parks)[names(parks) == "AREA_NAME"] <- "neighbourhood"
parks <- parks[order(parks$neighbourhood), ]
parks$neighbourhood <- tolower(parks$neighbourhood)
parks$neighbourhood <- gsub("[[:punct:]0-9]", "", parks$neighbourhood)
parks <- parks %>%
group_by(neighbourhood)
# Calculate the area of each park
parks <- parks %>%
mutate(park_area = st_area(geometry))
# Sum the area per neighbourhood
parks <- parks %>%
group_by(neighbourhood) %>%
summarise(total_park_area = sum(park_area))
# Add the neighbourhood area
parks <- parks %>%
mutate(neighbourhood_area = toronto$area)
# Calculate how much of the neighbourhood is a park
parks <- parks %>%
mutate(proportion = total_park_area/neighbourhood_area)
parks <- parks[order(parks$neighbourhood), ]
census2016_sf <- census2016_sf %>%
mutate(proportion_park_area = parks$proportion)
census2016_sf$proportion_park_area <- as.numeric(census2016_sf$proportion_park_area)
libraries_geojson_file <- file.path(project_directory, "data", "geodata", "libraries.geojson")
libraries<- st_read(libraries_geojson_file)
## Reading layer `libraries' from data source
## `/Users/michelleuni/Documents/UU/Spatial_Analysis/Term Project/data/geodata/libraries.geojson'
## using driver `GeoJSON'
## Simple feature collection with 108 features and 63 fields
## Geometry type: MULTIPOINT
## Dimension: XY
## Bounding box: xmin: -79.61925 ymin: 43.59905 xmax: -79.14005 ymax: 43.82411
## Geodetic CRS: WGS 84
names(libraries)[names(libraries) == "AREA_NAME"] <- "neighbourhood"
libraries <- libraries[order(libraries$neighbourhood), ]
# data cleaning
libraries$neighbourhood <- tolower(libraries$neighbourhood)
libraries$neighbourhood <- gsub("[[:punct:]0-9]", "", libraries$neighbourhood)
libraries$neighbourhood <- trimws(libraries$neighbourhood)
libraries$neighbourhood[which(libraries$neighbourhood == 'cabbagetownsouth stjames town')] <- 'cabbagetownsouth st james town'
libraries$neighbourhood[which(libraries$neighbourhood == 'north stjames town')] <- 'north st james town'
# group by neighbourhood to sum up number of libraries in each
libraries <- libraries %>%
group_by(neighbourhood) %>%
summarise(num_libraries = n())
# some neighbourhoods have no libraries, but we still need them in the data
missing_neighborhoods <- setdiff(toronto$neighbourhood, libraries$neighbourhood)
length(missing_neighborhoods)
## [1] 77
toronto_missing <- toronto[toronto$neighbourhood %in% missing_neighborhoods, ]
toronto_missing <- subset(toronto, neighbourhood %in% missing_neighborhoods, select = c("neighbourhood", "geometry"))
toronto_missing$num_libraries <- 0
# add the missing neighbourhoods
libraries <- rbind(libraries, toronto_missing)
libraries <- libraries[order(libraries$neighbourhood), ]
census2016_sf <- census2016_sf %>%
mutate(num_libraries = libraries$num_libraries)
police_geojson_file <- file.path(project_directory, "data", "geodata", "distance_to_police.geojson")
police <- st_read(police_geojson_file)
## Reading layer `distance_to_police' from data source
## `/Users/michelleuni/Documents/UU/Spatial_Analysis/Term Project/data/geodata/distance_to_police.geojson'
## using driver `GeoJSON'
## Simple feature collection with 140 features and 13 fields
## Geometry type: LINESTRING
## Dimension: XY
## Bounding box: xmin: -79.59636 ymin: 43.59236 xmax: -79.15084 ymax: 43.8212
## Geodetic CRS: WGS 84
names(police)[names(police) == "AREA_NAME"] <- "neighbourhood"
police <- police[order(police$neighbourhood), ]
police$neighbourhood <- tolower(police$neighbourhood)
police$neighbourhood <- gsub("[[:punct:]0-9]", "", police$neighbourhood)
police$neighbourhood <- trimws(police$neighbourhood)
police <- police[order(police$neighbourhood), ]
census2016_sf <- census2016_sf %>%
mutate(closest_police_facility = police$HubDist)
This shows the closest police facility for every neighbourhood (measured in km).
# Overlay libraries
police_plot <- toronto_map +
geom_sf(data = police, color = "yellow", size = 1) +
theme_void()
# Print the map
print(police_plot)
subway_geojson_file <- file.path(project_directory, "data", "geodata", "subway_stops.geojson")
subway <- st_read(subway_geojson_file)
## Reading layer `subway_stops' from data source
## `/Users/michelleuni/Documents/UU/Spatial_Analysis/Term Project/data/geodata/subway_stops.geojson'
## using driver `GeoJSON'
## Simple feature collection with 614 features and 37 fields
## Geometry type: MULTIPOINT
## Dimension: XY
## Bounding box: xmin: -79.53769 ymin: 43.63569 xmax: -79.26372 ymax: 43.78225
## Geodetic CRS: WGS 84
names(subway)[names(subway) == "AREA_NAME"] <- "neighbourhood"
subway$neighbourhood <- tolower(subway$neighbourhood)
subway$neighbourhood <- gsub("[[:punct:]0-9]", "", subway$neighbourhood)
subway$neighbourhood <- trimws(subway$neighbourhood)
subway$neighbourhood[which(subway$neighbourhood == 'cabbagetownsouth stjames town')] <- 'cabbagetownsouth st james town'
subway$neighbourhood[which(subway$neighbourhood == 'north stjames town')] <- 'north st james town'
# group by neighbourhood to sum up number of stops in each
subway <- subway %>%
group_by(neighbourhood) %>%
summarise(num_subway_stops = n())
# some neighbourhoods have no stops, but we still need them in the data
missing_neighborhoods2 <- setdiff(toronto$neighbourhood, subway$neighbourhood)
toronto_missing2 <- toronto[toronto$neighbourhood %in% missing_neighborhoods2, ]
toronto_missing2 <- subset(toronto, neighbourhood %in% missing_neighborhoods2, select = c("neighbourhood", "geometry"))
toronto_missing2$num_subway_stops <- 0
# add the missing neighbourhoods
subway <- rbind(subway, toronto_missing2)
subway <- subway[order(subway$neighbourhood), ]
census2016_sf <- census2016_sf %>%
mutate(num_subway_stops = subway$num_subway_stops)
cam_geojson_file <- file.path(project_directory, "data", "geodata", "cam2016.csv")
cameras <- read.csv(cam_geojson_file)
cameras <- cameras[order(cameras$neighbourhood), ]
cameras$neighbourhood[which(cameras$neighbourhood == 'cabbagetownsouth stjames town')] <- 'cabbagetownsouth st james town'
cameras$neighbourhood[which(cameras$neighbourhood == 'north stjames town')] <- 'north st james town'
# add geometries
subset_toronto <- toronto %>%
filter(neighbourhood %in% cameras$neighbourhood)
cameras <- cameras %>%
mutate(geometry = subset_toronto$geometry)
# some neighbourhoods have no stops, but we still need them in the data
missing_neighborhoods3 <- setdiff(toronto$neighbourhood, cameras$neighbourhood)
toronto_missing3 <- toronto[toronto$neighbourhood %in% missing_neighborhoods3, ]
toronto_missing3 <- subset(toronto, neighbourhood %in% missing_neighborhoods3, select = c("neighbourhood"))
toronto_missing3$num_cameras <- 0
cameras <- cameras[order(cameras$neighbourhood), ]
cameras <- rbind(cameras, toronto_missing3)
census2016_sf <- census2016_sf %>%
mutate(num_cameras = cameras$num_cameras)
# change fill to see different maps
ggplot() +
geom_sf(data = census2016_sf, aes(fill = num_crimes)) +
scale_fill_gradient(name = "Number of crimes", low = "grey", high = "red") +
theme_minimal()
#Continuity
crimedata_nbq <- poly2nb(census2016_sf, queen=TRUE)
crimedata_nbq_w <- nb2listw(crimedata_nbq, style="W", zero.policy = TRUE) #Queen’s
coordsW <- census2016_sf%>%
st_centroid()%>%
st_geometry()
## Warning: st_centroid assumes attributes are constant over geometries
#KNN
#crimedata_KNN <- knearneigh(coordsW, k=4)
#crimedata_nbq_KNN <- knn2nb(crimedata_KNN, sym=T)
#crimedata_KNN_w <- nb2listw(crimedata_nbq_KNN, style="W", zero.policy = TRUE)
plot(crimedata_nbq, st_geometry(coordsW), col="red")
mc_global <- moran.mc(census2016_sf$num_crimes, crimedata_nbq_w, 2999, alternative="greater")
#plot the Moran’s I
plot(mc_global)
mc_global
##
## Monte-Carlo simulation of Moran I
##
## data: census2016_sf$num_crimes
## weights: crimedata_nbq_w
## number of simulations + 1: 3000
##
## statistic = 0.34892, observed rank = 3000, p-value = 0.0003333
## alternative hypothesis: greater
#calculation
linear_regression_analysis <- function(equation, data) {
linearMod <- lm(equation, data = data)
summary_info <- summary(linearMod)
mc_global_OLS <- moran.mc(linearMod$residuals, crimedata_nbq_w, 2999, zero.policy = TRUE, alternative = "greater")
vif_values <- vif(linearMod)
# Save results
linear_results <- list(model = linearMod, summary = summary_info, vif = vif_values, moran=mc_global_OLS)
return(linear_results)
}
plot_residuals <- function(linear_results){
qtm(residuals(linear_results$linearMod))
}
# calculation
lisa_analysis <- function(data) {
gm_crime_LISA <- localmoran(data$num_crimes, crimedata_nbq_w)
summary_info <- summary(gm_crime_LISA)
# Save results
lisa_results <- list(gm_crime_LISA = gm_crime_LISA, summary = summary_info)
return(lisa_results)
}
# visualisation
visualise_LISA <- function(data, gm_crime_LISA) {
sf_object <- data
sf_object$gm_crime_LISA <- gm_crime_LISA[, 1]
p_values <- gm_crime_LISA[, 5]
map_LISA <- tm_shape(sf_object) +
tm_polygons(col = "gm_crime_LISA", title = "Local Moran’s I", midpoint = 0,
palette = "RdYlBu", breaks = c(-1, -0.5, 0, 0.5, 1, 2))
map_LISA_p <- tm_shape(sf_object) +
tm_polygons(col = "gm_crime_LISA", title = "p-values",
breaks = c(0, 0.01, 0.05, 1), palette = "Reds")
tmap_arrange(map_LISA, map_LISA_p)
}
# crime clusters
visualize_LISA_clusters <- function(data_sf, LISA_data, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan')) {
# Calculate mean value of the variable under consideration
meanVal <- mean(data_sf$num_crimes)
# Create a copy of LISA data to preserve original results
LISA_type <- LISA_data
# Create a tibble object storing the LISA values into different classes of significance
LISA_mapping <- LISA_type %>%
tibble::as_tibble() %>%
magrittr::set_colnames(c("Ii", "E.Ii", "Var.Ii", "Z.Ii", "Pr(z > 0)")) %>%
dplyr::mutate(coType = dplyr::case_when(
`Pr(z > 0)` > significanceLevel ~ "Insignificant", # Greater than significance level, thus Insignificant
`Pr(z > 0)` <= significanceLevel & Ii >= 0 & data_sf$num_crimes >= meanVal ~ "HH", # High-High local Moran's I values
`Pr(z > 0)` <= significanceLevel & Ii >= 0 & data_sf$num_crimes < meanVal ~ "LL", # Low-Low local Moran's I values
`Pr(z > 0)` <= significanceLevel & Ii < 0 & data_sf$num_crimes >= meanVal ~ "HL", # High-Low local Moran's I values
`Pr(z > 0)` <= significanceLevel & Ii < 0 & data_sf$num_crimes < meanVal ~ "LH" # Low-High local Moran's I values
))
# Join back to the main polygon data
data_sf$coType <- LISA_mapping$coType %>% tidyr::replace_na("Insignificant")
# Map the clusters
ggplot(data_sf) +
geom_sf(aes(fill = coType)) +
scale_fill_manual(values = colors, name = 'Clusters & \nOutliers') +
labs(title = "Crime clusters") +
theme(panel.background = element_rect(fill = 'transparent'))
}
# calculation
gwr_analysis <- function(equation, data, i) {
crimedata2016_sp <- as_Spatial(data)
print(paste("---Equation", i, "---"))
abw <- bw.gwr(equation, approach = "AIC", adaptive = TRUE, kernel = "gaussian", data = crimedata2016_sp)
a.gwr <- gwr.basic(equation, adaptive = TRUE, kernel = "gaussian", bw = abw, data = crimedata2016_sp)
# Save results
gwr_results <- list(a.gwr = a.gwr)
return(gwr_results)
}
# visualisation
extract_predictors <- function(equation) {
predictors <- attr(terms(equation), "term.labels")
return(predictors)
}
visualise_gwr <- function(agwr_sf, equation) {
predictors <- extract_predictors(equation)
maps <- lapply(predictors, function(predictor) {
qtm_map <- qtm(agwr_sf, predictor)
return(qtm_map)
})
tmap_arrange(maps, asp = 5, ncol = 1)
}
# only social indicators
eq1 <- num_crimes ~ unemployed + household_income + no_certificate_diploma_degree + commute_walk
eq2 <- num_crimes ~ unemployed + household_income + no_certificate_diploma_degree + commute_public_transport
eq3 <- num_crimes ~ unemployed + household_income + no_certificate_diploma_degree + commute_public_transport + single_parents
# only physical indicators - change these to include Asha's physical indicators
eq4 <- num_crimes ~ num_cameras + proportion_park_area + closest_police_facility
eq5 <- num_crimes ~ population_density + proportion_park_area + closest_police_facility + num_subway_stops
eq6 <- num_crimes ~ proportion_park_area + closest_police_facility + num_cameras
# both social and physical indicators
eq7 <- num_crimes ~ unemployed + + commute_public_transport + population_density + proportion_park_area
eq8 <- num_crimes ~ household_income + no_certificate_diploma_degree + num_cameras + population_density
eq9 <- num_crimes ~ low_income_percent + population_density + num_subway_stops + no_certificate_diploma_degree
eq10 <- num_crimes ~ proportion_park_area + unemployed + population_density + no_certificate_diploma_degree + household_income
# List of equations
equations <- list(eq1, eq2, eq3, eq4, eq5, eq6, eq7, eq8, eq9, eq10)
linear_results_list <- list()
lisa_results_list <- list()
gwr_results_list <- list()
# Perform analysis for each equation
for (i in seq_along(equations)) {
eq <- equations[[i]]
# Linear Regression Analysis
linear_results_list[[i]] <- linear_regression_analysis(eq, census2016_sf)
# LISA Analysis
lisa_results_list[[i]] <- lisa_analysis(census2016_sf)
# GWR Analysis
gwr_results_list[[i]] <- gwr_analysis(eq, census2016_sf, i)
}
## [1] "---Equation 1 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1760.241
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1756.92
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1753.423
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1750.522
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1748.303
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1747.261
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1746.78
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1746.737
## Adaptive bandwidth (number of nearest neighbours): 20 AICc value: 1747.365
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1746.459
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1746.459
## [1] "---Equation 2 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1783.573
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1780.546
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1776.387
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1772.109
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1767.095
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1764.025
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1762.681
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1762.711
## Adaptive bandwidth (number of nearest neighbours): 24 AICc value: 1763.131
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1762.553
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1762.553
## [1] "---Equation 3 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1781.222
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1775.519
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1767.925
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1761.35
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1756.199
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1753.178
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1752.493
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1753.059
## Adaptive bandwidth (number of nearest neighbours): 24 AICc value: 1752.79
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1752.547
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1752.493
## [1] "---Equation 4 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1904.455
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1903.733
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1900.686
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1897.09
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1894.362
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1892.383
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1891.444
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1891.096
## Adaptive bandwidth (number of nearest neighbours): 20 AICc value: 1891.498
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1891.047
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1891.047
## [1] "---Equation 5 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1857.355
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1856.905
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1855.55
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1854.73
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1853.745
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1853.508
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1853.501
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1853.679
## Adaptive bandwidth (number of nearest neighbours): 24 AICc value: 1853.7
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1853.185
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1853.185
## [1] "---Equation 6 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1904.455
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1903.733
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1900.686
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1897.09
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1894.362
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1892.383
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1891.444
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1891.096
## Adaptive bandwidth (number of nearest neighbours): 20 AICc value: 1891.498
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1891.047
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1891.047
## [1] "---Equation 7 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1824.117
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1819.372
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1813.101
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1806.292
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1799.875
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1794.906
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1791.349
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1788.752
## Adaptive bandwidth (number of nearest neighbours): 20 AICc value: 1787.138
## Adaptive bandwidth (number of nearest neighbours): 19 AICc value: 1784.808
## Adaptive bandwidth (number of nearest neighbours): 19 AICc value: 1784.808
## [1] "---Equation 8 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1784.154
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1781.083
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1777.658
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1775.712
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1773.202
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1771.712
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1770.57
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1770.53
## Adaptive bandwidth (number of nearest neighbours): 20 AICc value: 1770.348
## Adaptive bandwidth (number of nearest neighbours): 19 AICc value: 1769.09
## Adaptive bandwidth (number of nearest neighbours): 19 AICc value: 1769.09
## [1] "---Equation 9 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1840.426
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1839.856
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1838.918
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1838.712
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1838.271
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1838.123
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1838.247
## Adaptive bandwidth (number of nearest neighbours): 27 AICc value: 1838.167
## Adaptive bandwidth (number of nearest neighbours): 24 AICc value: 1838.087
## Adaptive bandwidth (number of nearest neighbours): 24 AICc value: 1838.087
## [1] "---Equation 10 ---"
## Adaptive bandwidth (number of nearest neighbours): 94 AICc value: 1779.979
## Adaptive bandwidth (number of nearest neighbours): 66 AICc value: 1776.633
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 1772.516
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: 1768.819
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: 1765.395
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: 1763.111
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: 1762.292
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: 1762.3
## Adaptive bandwidth (number of nearest neighbours): 24 AICc value: 1762.521
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1761.99
## Adaptive bandwidth (number of nearest neighbours): 22 AICc value: 1761.99
# equation 1
linear_results_list[[1]]
## $model
##
## Call:
## lm(formula = equation, data = data)
##
## Coefficients:
## (Intercept) unemployed
## 15.435032 -0.002092
## household_income no_certificate_diploma_degree
## 0.012563 0.037000
## commute_walk
## 0.057229
##
##
## $summary
##
## Call:
## lm(formula = equation, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -433.27 -55.99 -16.01 25.81 717.82
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.435032 25.374382 0.608 0.544016
## unemployed -0.002092 0.009019 -0.232 0.816917
## household_income 0.012563 0.006286 1.999 0.047651 *
## no_certificate_diploma_degree 0.037000 0.010403 3.557 0.000519 ***
## commute_walk 0.057229 0.010883 5.259 5.54e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 129.5 on 135 degrees of freedom
## Multiple R-squared: 0.6571, Adjusted R-squared: 0.6469
## F-statistic: 64.67 on 4 and 135 DF, p-value: < 2.2e-16
##
##
## $vif
## unemployed household_income
## 6.880903 7.525150
## no_certificate_diploma_degree commute_walk
## 3.171722 3.880383
##
## $moran
##
## Monte-Carlo simulation of Moran I
##
## data: linearMod$residuals
## weights: crimedata_nbq_w
## number of simulations + 1: 3000
##
## statistic = 0.076061, observed rank = 2874, p-value = 0.042
## alternative hypothesis: greater
# equation 2
linear_results_list[[2]]
## $model
##
## Call:
## lm(formula = equation, data = data)
##
## Coefficients:
## (Intercept) unemployed
## -21.52553 -0.02597
## household_income no_certificate_diploma_degree
## 0.05018 0.04327
## commute_public_transport
## -0.03021
##
##
## $summary
##
## Call:
## lm(formula = equation, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -309.39 -59.17 -22.17 27.73 743.16
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -21.525534 26.117903 -0.824 0.411298
## unemployed -0.025968 0.008486 -3.060 0.002670 **
## household_income 0.050176 0.006080 8.253 1.24e-13 ***
## no_certificate_diploma_degree 0.043272 0.011463 3.775 0.000239 ***
## commute_public_transport -0.030208 0.014509 -2.082 0.039234 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 140 on 135 degrees of freedom
## Multiple R-squared: 0.5997, Adjusted R-squared: 0.5878
## F-statistic: 50.56 on 4 and 135 DF, p-value: < 2.2e-16
##
##
## $vif
## unemployed household_income
## 5.218472 6.030366
## no_certificate_diploma_degree commute_public_transport
## 3.298805 5.143706
##
## $moran
##
## Monte-Carlo simulation of Moran I
##
## data: linearMod$residuals
## weights: crimedata_nbq_w
## number of simulations + 1: 3000
##
## statistic = 0.18061, observed rank = 2997, p-value = 0.001
## alternative hypothesis: greater
# equation 3
linear_results_list[[3]]
## $model
##
## Call:
## lm(formula = equation, data = data)
##
## Coefficients:
## (Intercept) unemployed
## -22.27344 -0.02150
## household_income no_certificate_diploma_degree
## 0.04995 0.05335
## commute_public_transport single_parents
## -0.02757 -0.05376
##
##
## $summary
##
## Call:
## lm(formula = equation, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -303.72 -67.72 -18.41 28.95 739.01
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -22.273444 26.145779 -0.852 0.39579
## unemployed -0.021502 0.009786 -2.197 0.02972 *
## household_income 0.049952 0.006088 8.205 1.68e-13 ***
## no_certificate_diploma_degree 0.053353 0.015877 3.360 0.00101 **
## commute_public_transport -0.027570 0.014799 -1.863 0.06466 .
## single_parents -0.053759 0.058546 -0.918 0.36015
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 140 on 134 degrees of freedom
## Multiple R-squared: 0.6022, Adjusted R-squared: 0.5873
## F-statistic: 40.57 on 5 and 134 DF, p-value: < 2.2e-16
##
##
## $vif
## unemployed household_income
## 6.930897 6.040073
## no_certificate_diploma_degree commute_public_transport
## 6.321437 5.345046
## single_parents
## 10.593496
##
## $moran
##
## Monte-Carlo simulation of Moran I
##
## data: linearMod$residuals
## weights: crimedata_nbq_w
## number of simulations + 1: 3000
##
## statistic = 0.19135, observed rank = 2996, p-value = 0.001333
## alternative hypothesis: greater
# equation 4
linear_results_list[[4]]
## $model
##
## Call:
## lm(formula = equation, data = data)
##
## Coefficients:
## (Intercept) num_cameras proportion_park_area
## 367.832 -4.988 -256.239
## closest_police_facility
## -39.488
##
##
## $summary
##
## Call:
## lm(formula = equation, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -232.04 -127.14 -53.54 40.88 1040.75
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 367.832 42.919 8.570 2.01e-14 ***
## num_cameras -4.988 12.918 -0.386 0.70000
## proportion_park_area -256.239 215.370 -1.190 0.23621
## closest_police_facility -39.488 14.355 -2.751 0.00676 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 212.4 on 136 degrees of freedom
## Multiple R-squared: 0.07146, Adjusted R-squared: 0.05098
## F-statistic: 3.489 on 3 and 136 DF, p-value: 0.01757
##
##
## $vif
## num_cameras proportion_park_area closest_police_facility
## 1.012933 1.037584 1.028877
##
## $moran
##
## Monte-Carlo simulation of Moran I
##
## data: linearMod$residuals
## weights: crimedata_nbq_w
## number of simulations + 1: 3000
##
## statistic = 0.33258, observed rank = 3000, p-value = 0.0003333
## alternative hypothesis: greater
# equation 5
linear_results_list[[5]]
## $model
##
## Call:
## lm(formula = equation, data = data)
##
## Coefficients:
## (Intercept) population_density proportion_park_area
## 198.752 9.584 -14.275
## closest_police_facility num_subway_stops
## -14.284 1.812
##
##
## $summary
##
## Call:
## lm(formula = equation, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -550.21 -87.32 -36.26 56.68 922.38
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 198.752 40.865 4.864 3.16e-06 ***
## population_density 9.584 1.334 7.182 4.18e-11 ***
## proportion_park_area -14.275 185.389 -0.077 0.939
## closest_police_facility -14.284 12.631 -1.131 0.260
## num_subway_stops 1.812 1.177 1.540 0.126
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 179.7 on 135 degrees of freedom
## Multiple R-squared: 0.34, Adjusted R-squared: 0.3204
## F-statistic: 17.39 on 4 and 135 DF, p-value: 1.579e-11
##
##
## $vif
## population_density proportion_park_area closest_police_facility
## 1.123477 1.073673 1.112365
## num_subway_stops
## 1.032180
##
## $moran
##
## Monte-Carlo simulation of Moran I
##
## data: linearMod$residuals
## weights: crimedata_nbq_w
## number of simulations + 1: 3000
##
## statistic = 0.19558, observed rank = 2999, p-value = 0.0003333
## alternative hypothesis: greater
# equation 6
linear_results_list[[6]]
## $model
##
## Call:
## lm(formula = equation, data = data)
##
## Coefficients:
## (Intercept) proportion_park_area closest_police_facility
## 367.832 -256.239 -39.488
## num_cameras
## -4.988
##
##
## $summary
##
## Call:
## lm(formula = equation, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -232.04 -127.14 -53.54 40.88 1040.75
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 367.832 42.919 8.570 2.01e-14 ***
## proportion_park_area -256.239 215.370 -1.190 0.23621
## closest_police_facility -39.488 14.355 -2.751 0.00676 **
## num_cameras -4.988 12.918 -0.386 0.70000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 212.4 on 136 degrees of freedom
## Multiple R-squared: 0.07146, Adjusted R-squared: 0.05098
## F-statistic: 3.489 on 3 and 136 DF, p-value: 0.01757
##
##
## $vif
## proportion_park_area closest_police_facility num_cameras
## 1.037584 1.028877 1.012933
##
## $moran
##
## Monte-Carlo simulation of Moran I
##
## data: linearMod$residuals
## weights: crimedata_nbq_w
## number of simulations + 1: 3000
##
## statistic = 0.33258, observed rank = 3000, p-value = 0.0003333
## alternative hypothesis: greater
# equation 7
linear_results_list[[7]]
## $model
##
## Call:
## lm(formula = equation, data = data)
##
## Coefficients:
## (Intercept) unemployed commute_public_transport
## 26.83523 0.01141 0.03689
## population_density proportion_park_area
## 5.39234 -70.31357
##
##
## $summary
##
## Call:
## lm(formula = equation, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -430.52 -66.64 -22.01 25.82 814.00
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 26.835225 37.207572 0.721 0.472015
## unemployed 0.011410 0.005957 1.915 0.057544 .
## commute_public_transport 0.036894 0.012524 2.946 0.003795 **
## population_density 5.392337 1.524416 3.537 0.000555 ***
## proportion_park_area -70.313569 166.291306 -0.423 0.673089
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 163.9 on 135 degrees of freedom
## Multiple R-squared: 0.4508, Adjusted R-squared: 0.4346
## F-statistic: 27.71 on 4 and 135 DF, p-value: < 2.2e-16
##
##
## $vif
## unemployed commute_public_transport population_density
## 1.874388 2.793738 1.761982
## proportion_park_area
## 1.038220
##
## $moran
##
## Monte-Carlo simulation of Moran I
##
## data: linearMod$residuals
## weights: crimedata_nbq_w
## number of simulations + 1: 3000
##
## statistic = 0.29882, observed rank = 3000, p-value = 0.0003333
## alternative hypothesis: greater
# equation 8
linear_results_list[[8]]
## $model
##
## Call:
## lm(formula = equation, data = data)
##
## Coefficients:
## (Intercept) household_income
## -19.82803 0.02625
## no_certificate_diploma_degree num_cameras
## 0.01469 -8.85584
## population_density
## 3.64098
##
##
## $summary
##
## Call:
## lm(formula = equation, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -406.07 -52.73 -14.07 30.77 748.92
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -19.828030 27.215403 -0.729 0.46753
## household_income 0.026250 0.003344 7.851 1.14e-12 ***
## no_certificate_diploma_degree 0.014688 0.006988 2.102 0.03741 *
## num_cameras -8.855844 8.794282 -1.007 0.31574
## population_density 3.640975 1.255088 2.901 0.00434 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 142 on 135 degrees of freedom
## Multiple R-squared: 0.5879, Adjusted R-squared: 0.5757
## F-statistic: 48.16 on 4 and 135 DF, p-value: < 2.2e-16
##
##
## $vif
## household_income no_certificate_diploma_degree
## 1.771998 1.191055
## num_cameras population_density
## 1.050118 1.591810
##
## $moran
##
## Monte-Carlo simulation of Moran I
##
## data: linearMod$residuals
## weights: crimedata_nbq_w
## number of simulations + 1: 3000
##
## statistic = 0.22648, observed rank = 3000, p-value = 0.0003333
## alternative hypothesis: greater
# equation 9
linear_results_list[[9]]
## $model
##
## Call:
## lm(formula = equation, data = data)
##
## Coefficients:
## (Intercept) low_income_percent
## 45.55742 2.31358
## population_density num_subway_stops
## 8.94888 2.12519
## no_certificate_diploma_degree
## 0.02907
##
##
## $summary
##
## Call:
## lm(formula = equation, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -510.91 -68.76 -16.35 29.98 980.26
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45.557423 40.309422 1.130 0.260399
## low_income_percent 2.313579 1.981853 1.167 0.245114
## population_density 8.948876 1.252485 7.145 5.09e-11 ***
## num_subway_stops 2.125189 1.098550 1.935 0.055138 .
## no_certificate_diploma_degree 0.029068 0.008018 3.625 0.000408 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 169.6 on 135 degrees of freedom
## Multiple R-squared: 0.4118, Adjusted R-squared: 0.3944
## F-statistic: 23.63 on 4 and 135 DF, p-value: 7.975e-15
##
##
## $vif
## low_income_percent population_density
## 1.181203 1.110509
## num_subway_stops no_certificate_diploma_degree
## 1.009740 1.098619
##
## $moran
##
## Monte-Carlo simulation of Moran I
##
## data: linearMod$residuals
## weights: crimedata_nbq_w
## number of simulations + 1: 3000
##
## statistic = 0.19267, observed rank = 3000, p-value = 0.0003333
## alternative hypothesis: greater
# equation 10
linear_results_list[[10]]
## $model
##
## Call:
## lm(formula = equation, data = data)
##
## Coefficients:
## (Intercept) proportion_park_area
## -9.02102 -93.41696
## unemployed population_density
## -0.02447 3.32138
## no_certificate_diploma_degree household_income
## 0.03905 0.03447
##
##
## $summary
##
## Call:
## lm(formula = equation, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -344.93 -53.85 -15.00 28.11 730.09
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.021021 30.412083 -0.297 0.767211
## proportion_park_area -93.416965 140.333806 -0.666 0.506762
## unemployed -0.024474 0.008401 -2.913 0.004195 **
## population_density 3.321381 1.237831 2.683 0.008211 **
## no_certificate_diploma_degree 0.039047 0.011129 3.509 0.000614 ***
## household_income 0.034469 0.004207 8.194 1.78e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 138.4 on 134 degrees of freedom
## Multiple R-squared: 0.6113, Adjusted R-squared: 0.5968
## F-statistic: 42.15 on 5 and 134 DF, p-value: < 2.2e-16
##
##
## $vif
## proportion_park_area unemployed
## 1.036856 5.228300
## population_density no_certificate_diploma_degree
## 1.629149 3.178776
## household_income
## 2.951207
##
## $moran
##
## Monte-Carlo simulation of Moran I
##
## data: linearMod$residuals
## weights: crimedata_nbq_w
## number of simulations + 1: 3000
##
## statistic = 0.18295, observed rank = 2997, p-value = 0.001
## alternative hypothesis: greater
census2016_sf$residuals1 <- residuals(linear_results_list[[1]]$model)
census2016_sf$residuals2 <- residuals(linear_results_list[[2]]$model)
census2016_sf$residuals3 <- residuals(linear_results_list[[3]]$model)
census2016_sf$residuals4 <- residuals(linear_results_list[[4]]$model)
census2016_sf$residuals5 <- residuals(linear_results_list[[5]]$model)
qtm(census2016_sf, "residuals1")
## Variable(s) "residuals1" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
qtm(census2016_sf, "residuals2")
## Variable(s) "residuals2" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
qtm(census2016_sf, "residuals3")
## Variable(s) "residuals3" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
qtm(census2016_sf, "residuals4")
## Variable(s) "residuals4" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
qtm(census2016_sf, "residuals5")
## Variable(s) "residuals5" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
census2016_sf$residuals6 <- residuals(linear_results_list[[6]]$model)
census2016_sf$residuals7 <- residuals(linear_results_list[[7]]$model)
census2016_sf$residuals8 <- residuals(linear_results_list[[8]]$model)
census2016_sf$residuals9 <- residuals(linear_results_list[[9]]$model)
census2016_sf$residuals10 <- residuals(linear_results_list[[10]]$model)
qtm(census2016_sf, "residuals6")
## Variable(s) "residuals6" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
qtm(census2016_sf, "residuals7")
## Variable(s) "residuals7" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
qtm(census2016_sf, "residuals8")
## Variable(s) "residuals8" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
qtm(census2016_sf, "residuals9")
## Variable(s) "residuals9" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
qtm(census2016_sf, "residuals10")
## Variable(s) "residuals10" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
visualise_LISA(census2016_sf, lisa_results_list[[1]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
visualise_LISA(census2016_sf, lisa_results_list[[2]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
visualise_LISA(census2016_sf, lisa_results_list[[3]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
visualise_LISA(census2016_sf, lisa_results_list[[4]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
visualise_LISA(census2016_sf, lisa_results_list[[5]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
visualise_LISA(census2016_sf, lisa_results_list[[6]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
visualise_LISA(census2016_sf, lisa_results_list[[7]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
visualise_LISA(census2016_sf, lisa_results_list[[8]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
visualise_LISA(census2016_sf, lisa_results_list[[9]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
visualise_LISA(census2016_sf, lisa_results_list[[10]]$gm_crime_LISA)
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are higher than the highest break
## Warning: Values have found that are less than the lowest break
## Warning: Values have found that are higher than the highest break
visualize_LISA_clusters(census2016_sf, lisa_results_list[[1]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))
visualize_LISA_clusters(census2016_sf, lisa_results_list[[2]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))
visualize_LISA_clusters(census2016_sf, lisa_results_list[[3]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))
visualize_LISA_clusters(census2016_sf, lisa_results_list[[4]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))
visualize_LISA_clusters(census2016_sf, lisa_results_list[[5]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))
visualize_LISA_clusters(census2016_sf, lisa_results_list[[6]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))
visualize_LISA_clusters(census2016_sf, lisa_results_list[[7]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))
visualize_LISA_clusters(census2016_sf, lisa_results_list[[8]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))
visualize_LISA_clusters(census2016_sf, lisa_results_list[[9]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))
visualize_LISA_clusters(census2016_sf, lisa_results_list[[10]]$gm_crime_LISA, significanceLevel = 0.05, colors = c('red', 'brown', 'NA', 'blue', 'cyan'))
# Plot first 5 GWR results for each equation
for (i in seq_along(gwr_results_list[1:5])) {
gwr_result <- gwr_results_list[[i]]
equation <- equations[[i]]
# Extract GWR spatial data frame
agwr_sf <- st_as_sf(gwr_result$a.gwr$SDF)
# Visualize GWR result
gwr_plot <- visualise_gwr(agwr_sf, equation)
# Print GWR plot
print(gwr_plot)
}
## Variable(s) "unemployed" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "household_income" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "no_certificate_diploma_degree" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "unemployed" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "no_certificate_diploma_degree" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "commute_public_transport" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "unemployed" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "commute_public_transport" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "single_parents" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "num_cameras" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "proportion_park_area" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "closest_police_facility" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "proportion_park_area" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "closest_police_facility" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "num_subway_stops" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
for (i in seq_along(gwr_results_list)[6:10]) {
gwr_result <- gwr_results_list[[i]]
equation <- equations[[i]]
# Extract GWR spatial data frame
agwr_sf <- st_as_sf(gwr_result$a.gwr$SDF)
# Visualize GWR result
gwr_plot <- visualise_gwr(agwr_sf, equation)
# Print GWR plot
print(gwr_plot)
}
## Variable(s) "proportion_park_area" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "closest_police_facility" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "num_cameras" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "commute_public_transport" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "population_density" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "proportion_park_area" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "no_certificate_diploma_degree" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "num_cameras" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "population_density" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "low_income_percent" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "proportion_park_area" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "unemployed" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "population_density" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "no_certificate_diploma_degree" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.